## khandelwal demand regressions

require(data.table)
require(lubridate)
require(lfe)

require(ggplot2)
require(ggthemes)
require(forcats)

require(stargazer)

proper <- function(x){gsub("(?<=\\b)([a-z])", "\\U\\1", tolower(x), perl=TRUE)}

theme_dan <- function(base_size=12){
  	theme_classic(base_size = base_size)+
	theme(axis.line.x = element_line(color='black'), axis.line.y=element_line(color='black')) + 
	theme(legend.position='bottom', legend.title=element_blank())  
}



## ===================================================================
## DATA INPUT
## ===================================================================

## SET DATA DIRECTORY HERE
## setwd(...)

# -- -- --
## main dataset - MONTHLY sales and prices

cat <- fread('aggregate_monthly_panel.csv')
cat$date <- as.Date(cat$date)

## -- -- --
## seasons

smap <- data.table(input=c('', 'FW11', 'FW12', 'FW13', 'FW14', 'FW15', 'OUTLET', 'SS11', 'SS12', 'SS13', 'SS14', 'SS15'), 
		out=as.Date(c(NA,'2011-09-01', '2012-09-01', '2013-09-01', '2014-09-01', '2015-09-01', NA, 
			'2011-03-01', '2012-03-01', '2013-03-01', '2014-03-01', '2015-03-01'),format='%Y-%m-%d'))
cat$season <- smap$out[match(cat$season_1s, smap$input)]
cat <- cat[!is.na(cat$season), ]


## -- -- --
## months since first introduction (for demand regressions)

dates <- sort(unique(cat$date))
cat$t1 <- match(cat$date, dates)
cat[, by='product_sku', 't1' := as.integer(t1 - min(t1) + 1)]

## -- -- --
## materials

mat <- fread('natural_coding.csv')
mat$season <- as.Date(mat$season)
cat <- merge(cat, mat[, -c('haspolymer', 'production_country'), with=F], by=c('product_sku', 'season', 'brand', 'targetgroup'))

## -- -- --
## exchange rate dataset (over long time period, since operate in 6 month increments)

erdat <- fread('USD RUB Historical Data.csv')
setnames(erdat, c('date', 'price', 'open', 'high', 'low', 'change'))
erdat <- erdat[1:4276, ]
erdat$date <- as.Date(erdat$date, format='%b %d, %Y')
erdat$price <- as.numeric(erdat$price)

erdat$mo <- month(erdat$date)
erdat$year <- year(erdat$date)
erdat$seas <- 3*(erdat$mo >= 3 & erdat$mo < 9) + 9*(erdat$mo >= 9 | erdat$mo < 3)
erdat$yearseas <- erdat$year*(erdat$mo >= 3) + (erdat$year - 1)*(erdat$mo < 3)
erdat$season <- as.Date(paste(erdat$yearseas, erdat$seas, '01', sep='-'))

erdatf <- erdat[, by='season', list(usdrubm = mean(price))]
erdatf <- erdatf[order(season), ]

erdatf$usdrubm.l <- c(NA, erdatf$usdrubm[1:(nrow(erdatf)-1)])

## -- -- --
## estimate AR(1) on exchange rate to get long run average for normalization (normalize ER by the long run average)
er1 <- felm(usdrubm ~ usdrubm.l, data=erdatf[season <= as.Date('2014-03-01'), ])
ersd <- sd(er1$residual)

## long run average:
er_lra <- coef(er1)[1]/(1 - coef(er1)[2])

erdatf$usdrubm_nor <- erdatf$usdrubm/er_lra
erdatf$usdrubm_nor.l <- erdatf$usdrubm.l/er_lra




## ===================================================================
## STATIC DEMAND ESTIMATION - CREATE VARIABLES
## ===================================================================

## steps:
##	market size to construct outside option, merge into data
##	demand regressions

## -- -- -- -- -- --
## market size (total number of potential purchases, extrapolated)

msdt <- cat[, by='season', list(orders=sum(orders), sales=sum(sales))]
msdt <- msdt[order(season), ]
msdt$t <- 1:nrow(msdt)

msdt$totpredorders <- floor(exp(felm(log(orders) ~ t, data=msdt)$fitted.values))
msdt$totpredorders.linear <- floor(felm(orders ~ t, data=msdt)$fitted.values)

msdt <- msdt[, c('season', 'orders', 'sales', 'totpredorders', 'totpredorders.linear'), with=F]
setnames(msdt, c('season', 'totorders', 'totsales', 'totpredorders', 'totpredorders.linear'))

## -- -- -- -- -- --
## creating variables for regression

cat$lsales <- log(cat$sales)
cat$lcog <- log(cat$cog)
cat$lsale_price <- log(cat$sale_price)

ccat <- cat[lsales < quantile(cat$lsales, na.rm=T, 0.99) & 
			lsales > -Inf & lsale_price > -Inf & lcog > -Inf &
			!is.na(lsales) & !is.na(lsale_price) & !is.na(lcog) & date <= as.Date('2015-10-01'), ]		##  & t1 %in% 1:3
ccat$tgbs <- paste(ccat$targetgroup, ccat$brand, ccat$season, sep='_')
ccat$tgb <- paste(ccat$targetgroup, ccat$brand, sep='_')

ccat$tgbd <- paste(ccat$targetgroup, ccat$brand, ccat$date, sep="_")
ccat$tgd <- paste(ccat$targetgroup, ccat$date, sep="_")

ccat <- merge(ccat, msdt, by='season')
ccat$share <- ccat$sales/ccat$totpredorders
ccat$lshare <- log(ccat$share)
ccat$y <- log(ccat$sales/ccat$totpredorders) - log(1 - ccat$totsales/ccat$totpredorders)		

## nest shares: 
ccat[, by=.(targetgroup, date), c('nestorders', 'nestsales') := list(sum(orders), sum(sales))]
ccat$nsshare <- ccat$sales/ccat$nestsales
ccat$noshare <- ccat$orders/ccat$nestorders
ccat$lnsshare <- log(ccat$nsshare)
ccat$lnoshare <- log(ccat$noshare)

## normalized sale price, so price coefficient in log-lin specification isn't tiny
ccat$sale_price_nor <- ccat$sale_price/1000
ccat$sale_price_norXnat <- ccat$sale_price_nor*ccat$nat

## instruments are cog_n, and cog_n X nat
ccat$cogXnat <- ccat$cog_n*ccat$nat

## instruments and regressors for when using log sale price
ccat$lsale_priceXnat <- ccat$lsale_price*ccat$nat
ccat$lcogXnat <- ccat$lcog*ccat$nat


## ===================================================================
## DEMAND SPECIFICATIONS
## ===================================================================

ccat_t <- ccat[sale_price < quantile(sale_price,0.99, na.rm=T) & t1 %in% 1:3 & season >= as.Date('2012-03-01') & 
		!is.na(t1) & !is.na(nat) & !is.na(lnoshare) & !is.na(premium_1s) & !is.na(sale_price_nor) & !is.na(cog_n) & 
		!is.na(y), ]


oo_1ni <- felm(y ~ sale_price_nor + sale_price_norXnat + as.factor(t1):nat + as.factor(t1) + nat + nat:rus + rus + lnoshare + premium_1s | tgbs + date | 
		0 | tgbs, data=ccat_t[, ])

oo_1 <- felm(y ~ as.factor(t1):nat + as.factor(t1) + nat + nat:rus + rus + lnoshare + premium_1s | tgbs + date  | 
		(sale_price_nor|sale_price_norXnat ~ cog_n + cogXnat) | tgbs, 
		data=ccat_t[, ])

oo_2ni <- felm(y ~ lsale_price + lsale_priceXnat + as.factor(t1):nat + as.factor(t1) + nat + rus + nat:rus + lnoshare + premium_1s  | tgbs + date |
		0 | tgbs, data=ccat_t[,])

oo_2 <- felm(y ~ as.factor(t1):nat + as.factor(t1) + nat + rus + nat:rus + lnoshare + premium_1s  | tgbs + date |
		(lsale_price|lsale_priceXnat ~ lcog + lcogXnat) | tgbs, 
		data=ccat_t[,])



## ===================================================================
## OUTPUT FOR TABLE A.3: LOGIT DEMAND REGRESSION RESULTS
## ===================================================================

## elasticities, nested logit formula:
##	without log prices: ds/dp = alpha*s_{jt}*(1/(1-sigma) - (sigma/(1-sigma))*ns_{jt} - s_{jt})
##	with log prices: ds/dp = (alpha/p)*...

e_oo_1ni <- (coef(oo_1ni)[1] + coef(oo_1ni)[2]*ccat_t$nat)*
			(1/(1-coef(oo_1ni)[7]) - (coef(oo_1ni)[7]/(1-coef(oo_1ni)[7]))*ccat_t$noshare - ccat_t$share)*ccat_t$sale_price_nor

e_oo_1 <- (coef(oo_1)[10] + coef(oo_1)[11]*ccat_t$nat)*
			(1/(1-coef(oo_1)[5]) - (coef(oo_1)[5]/(1-coef(oo_1)[5]))*ccat_t$noshare - ccat_t$share)*ccat_t$sale_price_nor

e_oo_2ni <- (coef(oo_2ni)[1] + coef(oo_2ni)[2]*ccat_t$nat)*
			(1/(1-coef(oo_2ni)[7]) - (coef(oo_2ni)[7]/(1-coef(oo_2ni)[7]))*ccat_t$noshare - ccat_t$share)

e_oo_2 <- (coef(oo_2)[10] + coef(oo_2)[11]*ccat_t$nat)*
			(1/(1-coef(oo_2)[5]) - (coef(oo_2)[5]/(1-coef(oo_2)[5]))*ccat_t$noshare - ccat_t$share)


stargazer(oo_1ni, oo_1, oo_2ni, oo_2, 
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(c('Mean Elasticity: Low Quality', round(mean(e_oo_1ni[ccat_t$nat == 0]), 2), round(mean(e_oo_1[ccat_t$nat == 0]), 2), 
										round(mean(e_oo_2ni[ccat_t$nat == 0]), 2), round(mean(e_oo_2[ccat_t$nat == 0]), 2)), 
				c('Mean Elasticity: High Quality', round(mean(e_oo_1ni[ccat_t$nat == 1]), 2), round(mean(e_oo_1[ccat_t$nat == 1]), 2), 
										round(mean(e_oo_2ni[ccat_t$nat == 1]), 2), round(mean(e_oo_2[ccat_t$nat == 1]), 2)), 
				c('First-Stage F Stat', NA, round(summary(oo_1)$fstat,0), NA, round(summary(oo_2)$fstat,0)), 
				c('$h_{j}(\\tau)$', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark'), 
				c('Group-Brand-Season FE',  '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark'), 
				c('Month FE', '\\checkmark', '\\checkmark', '\\checkmark', '\\checkmark')))



## ===================================================================
## RECOVER QUALITY RESIDUALS
## ===================================================================

recQualities <- function(reg){
	## function that recovers qualities for regressions that look like oo_1

	fedt <- data.table(idx=getfe(reg)$idx, type=getfe(reg)$fe, val=getfe(reg)$effect)
	fedt1 <- fedt[type == 'tgbs', c('idx', 'val'), with=F]
	fedt2 <- fedt[type == 'date', c('idx', 'val'), with=F]
	setnames(fedt1, c('tgbs', 'fe.tgbs'))
	setnames(fedt2, c('date', 'fe.date'))
	fedt2$date <- as.Date(fedt2$date)

	ccat2 <- merge(ccat_t, fedt1, by='tgbs')
	ccat2 <- merge(ccat2, fedt2, by='date')
	ccat2$fe.res <- reg$iv.residuals
	ccat2$fe_c <- ccat2$fe.tgbs + 
		ccat2$nat*coef(reg)[3] + ccat2$rus*coef(reg)[4] + ccat2$premium_1s*coef(reg)[6] + ccat2$nat*ccat2$rus*coef(reg)[6]

	ccat2 <- ccat2[, by='product_sku', list(targetgroup=targetgroup[1], brand=brand[1], tgb=tgb[1], tgbs=tgbs[1],
							fe_c=fe_c[1], 
							season=season[1], nat=nat[1], premium_1s=premium_1s[1], 
							rus=rus[1])]
}

fedt1 <- recQualities(oo_1)
fedt2 <- recQualities(oo_2)

fedt1 <- fedt1[, .(product_sku, targetgroup, season, fe_c)]					## fe_tv_w, fe_tv, 
fedt2 <- fedt2[, .(product_sku, targetgroup, season, fe_c)]					## fe_tv_w, fe_tv, 

names(fedt1)[4] <- c('linfe_c')									## 'linfe_tv_w', 'linfe_tv', 
names(fedt2)[4] <- c('logfe_c')									## 'logfe_tv_w', 'logfe_tv', 

fedt_all <- merge(fedt1, fedt2, by=c('product_sku', 'targetgroup', 'season'))

## writes to initially set data folder
fwrite(fedt_all, 'khandelwal_residuals.csv', row.names=F)



## ===================================================================
## PRODUCT GROUP SPECIFIC QUALITY SHIFTER ESTIMATES
## ===================================================================


targetgroups <- sort(unique(ccat$targetgroup))
fe_l <- lapply(1:length(targetgroups), function(x) '')
for (i in 1:length(targetgroups)){
	fe_l[[i]] <- felm(y ~ as.factor(t1):nat + as.factor(t1) + nat + lnoshare + premium_1s  | tgbs + date |
		(lsale_price ~ lcog) | tgbd, 
		data=ccat_t[targetgroup == targetgroups[i] & season <= as.Date('2015-09-01'), ])

}

tt <- data.table(tg = targetgroups, nat=sapply(1:length(targetgroups), function(x) coef(fe_l[[x]])[3]), 
			natsig = sapply(1:length(targetgroups), function(x) fe_l[[x]]$cpval[3]), 
			sig_l=sapply(1:length(targetgroups), function(x) coef(fe_l[[x]])[8]), 
			sig_hd=sapply(1:length(targetgroups), function(x) coef(fe_l[[x]])[9])) 
tt$natsig <- ifelse(tt$natsig <= 0.05, 'p<0.05', ifelse(tt$natsig <= 0.1, 'p<0.1', 'Not Significant'))

tt <- tt[order(-nat), ]
tt$nat <- tt$nat
tt$tg <- factor(tt$tg, levels=tt$tg)


## positive and significant: possigtgs <- c('UNDERWEAR', 'HIGH BOOTS', 'HEELED SANDALS', 'MOCCASINS & ESPADRILLES', 'SHOES',
##						'SHIRTS', 'BALLERINA SHOES', 'SANDALS', 'HEADWEAR', 'TROUSERS & JUMPSUITS', 'DRESSES', 'KNITWEAR')
## positive: postgs <- c('UNDERWEAR', 'HIGH BOOTS', 'HEELED SANDALS', 'MOCCASINS & ESPADRILLES', 'SHOES', 'ANKLE BOOTS',
##						'SHIRTS', 'BALLERINA SHOES', 'SANDALS', 'HEADWEAR', 'TROUSERS & JUMPSUITS', 'BOOTS',
##						'DRESSES', 'KNITWEAR', 'TEE-SHIRTS & POLOS', 'SHORTS', 'SPORT SHOES', 'SCARVES', 'BAGS')
					
rp <- ggplot(data=tt, aes(x=tg, y=nat, group=natsig, shape = natsig)) + geom_point() + theme_dan() + 
		geom_hline(yintercept = 0, color='red', linetype='dashed') + scale_shape_manual(values=c(1,3,4)) + 
		theme(axis.title.x = element_blank(), axis.text.x = element_text(angle=45,hjust=1)) +
		ylab('Quality shifter coefficient')


## set output write directory here
ggsave('robustness_het_khandelwal_quality_shifter.pdf', rp, height=4, width=6)
dev.off()



##